
# 使用pdf
from langchain.document_loaders import PyPDFLoader,Docx2txtLoader

def genetateData(path):
    pd = PyPDFLoader(path)
    docs = pd.load()
    print(docs[0].page_content)
    query = """
    有文章如下，请总结此文章以10个问答的方式，要求：以json格式,instruction是问题，input为空，output是结果

    {text}
    """
    from langchain.prompts import PromptTemplate
    template = PromptTemplate(template=query, input_variables=["text"])
    from langchain.llms import Tongyi
    llm = Tongyi(api_key='sk-2fcd02c60280482a80f90cb678f5e94f')
    print(llm.invoke(template.format(text=docs[0].page_content)))

if __name__ == "__main__":
    genetateData()